昨天有跟大家分享過Prometheus是什麼東西,今天就來用python實際操作看看囉~
程式碼在這~我的Github
docker-prometheus/
├── alertmanager
│   └── config.yml
├── docker-compose.yml
├── grafana
│   └── config.monitoring
└── prometheus
    ├── alert.yml
    └── prometheus.yml
- prometheus 監控系統本體
- alertmanager 管理發送警報
- node_exporter 收集主機的運行指標如CPU, 內存,磁盤
- grafana 搭配的視覺化 Web UI
version: '3.3'
volumes:
  prometheus_data: {}
  grafana_data: {}
networks:
  monitoring:
    driver: bridge
services:
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    restart: always
    volumes:
      - /etc/localtime:/etc/localtime:ro
      - $PWD/prometheus/:/etc/prometheus/
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
    networks:
      - monitoring
    links:
      - alertmanager
      - node_exporter
      - flask_web_1
    expose:
      - '9090'
    ports:
      - 9090:9090
  alertmanager:
    image: prom/alertmanager
    container_name: alertmanager
    restart: always
    volumes:
      - /etc/localtime:/etc/localtime:ro
      - $PWD/alertmanager/:/etc/alertmanager/
    command:
      - '--config.file=/etc/alertmanager/config.yml'
      - '--storage.path=/alertmanager'
    networks:
      - monitoring
    expose:
      - '9093'
    ports:
      - 9093:9093
     
  node_exporter:
    image: prom/node-exporter:v0.18.0
    container_name: node_exporter
    restart: always
    volumes:
      - /etc/localtime:/etc/localtime:ro
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command: 
      - '--path.procfs=/host/proc' 
      - '--path.sysfs=/host/sys'
      - --collector.filesystem.ignored-mount-points
      - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
    networks:
      - monitoring
    expose:
      - '9100'
  grafana:
    image: grafana/grafana
    user: "104"
    container_name: grafana
    restart: always
    volumes:
      - /etc/localtime:/etc/localtime:ro
      - grafana_data:/var/lib/grafana
      - $PWD/grafana/provisioning/:/etc/grafana/provisioning/
    env_file:
      - $PWD/grafana/config.monitoring
    networks:
      - monitoring
    links:
      - prometheus
    ports:
      - 3000:3000
    depends_on:
      - prometheus
└── prometheus
    ├── alert.yml
    └── prometheus.yml
# global config
global:
  # 拉取目標的默認時間間隔
  # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  scrape_interval:     3s 
  # 執行規則的時間間隔
  # Evaluate rules every 3 seconds. The default is every 1 minute.
  evaluation_interval: 3s 
  # scrape_timeout is set to the global default (10s)拉取一個目標原子的時間.
  # external_labels:額外的屬性,會添加到拉取的數據並存到數據庫中
# Alertmanager configuration 
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['alertmanager:9093']
# 根據alert.yml 的規則,並依造evaluation_interval的時間做拉取
rule_files:
  - "alert.yml"
# 抓取資料的目標
scrape_configs:
  # 任務名稱
  - job_name: 'prometheus'
    # 覆蓋global 時間,這邊可以自行指定該服務要多久抓一次
    scrape_interval: 3s
    static_configs:
      - targets: ['prometheus:9090']
  - job_name: 'node_exporter'
    scrape_interval: 3s
    static_configs:
      - targets: ['node_exporter:9100']
  - job_name: 'flask_web'
    scrape_interval: 3s
    static_configs:
      - targets: ['web:5000']
groups:
- name: demo
  rules:
  - alert: service_down
    expr: up == 0
    labels:
      severity: page
    annotations:
      summary: "Instance {{ $labels.instance }} down"
      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
 
global:
  smtp_smarthost: 'smtp.gmail.com:587'
  # 從哪裡寄信
  smtp_from: '自己的@gmail.com'
  # 哪個帳號管裡
  smtp_auth_username: '自己的@gmail.com'
  # 密碼
  smtp_auth_password: '應用程式的密碼,不是你自己的'
  smtp_require_tls: true
route:
  group_by: ['alertname']
  group_wait: 15s ## 將緩衝警報15S 等待同一組其他條件 要是符合就一起寄
  group_interval: 3s ## 設置控制在發送同一組的進一步通知之前等待多長時間,並且時間間隔從發送的最後一個通知開始計算。
  # repeat_interval: 10m ## 十分鐘寄一次
  receiver: live-monitoring
receivers:
- name: 'live-monitoring'
  # 收件人
  email_configs:
  - to: '收件人@gmail.com'
GF_SECURITY_ADMIN_PASSWORD=密碼
GF_USERS_ALLOW_SIGN_UP=false
GF_SMTP_ENABLED=true
GF_SMTP_HOST=smtp.gmail.com:587
GF_SMTP_USER=自己的@gmail.com@gmail.com
GF_SMTP_PASSWORD=應用程式的密碼,不是你自己的
GF_SMTP_FROM_ADDRESS=自己的@gmail.com
最後就是進到docker-prometheus 執行docker-compose up -d,就大功告成!!
參考資料